import pandas as pd
import numpy as np
df= pd.read_excel("TweetsKetoDiet.xlsx")
df.head()
# cleaning dataset
#Drop rows with missing values
df.dropna(inplace=True)
df.head(5)
| Unnamed: 0 | User | Date Created | Number of Likes | Source of Tweet | Tweet | |
|---|---|---|---|---|---|---|
| 0 | 0 | ketotipslowcarb | 2023-01-09 23:58:29 | 0 | Twitter for Android | Powerful New Formula Triggers Fat-Burning Keto... |
| 1 | 1 | gimhuij44546290 | 2023-01-09 23:53:04 | 0 | Jetpack.com | Keto Diet vs Mediterranean Diet – Which Is Bet... |
| 2 | 2 | Americareform | 2023-01-09 23:50:14 | 1 | Twitter Web App | @UltraHottie2000 I understand and look up the ... |
| 3 | 3 | HealthLineups | 2023-01-09 23:48:47 | 2 | Twitter for iPhone | Eating Keto is simple 🥑🥘 Fewer carbs, more hea... |
| 4 | 4 | HealthLineups | 2023-01-09 23:47:55 | 1 | Twitter for iPhone | We're so grateful to all of our customers who ... |
import re
tweets_to_df = pd.DataFrame(df['Tweet'])
tweets_to_df.head()
#clean the tweets with a function
def cleanTweets(text):
text = re.sub('@[A-Za-z0-9_]+', '', text) #removes @mentions
text = re.sub('#','',text) #removes hastag '#' symbol
text = re.sub('RT[\s]+','',text)
text = re.sub('https?:\/\/\S+', '', text)
text = re.sub('\n',' ',text)
return text
tweets_to_df['cleanedTweets'] = df['Tweet'].apply(cleanTweets) #apply cleanTweet function to the tweet
tweets_to_df.tail() #compares original tweets with cleaned Tweets
| Tweet | cleanedTweets | |
|---|---|---|
| 4996 | The Healthy Mediterranean Keto Diet Cookbook :... | The Healthy Mediterranean Keto Diet Cookbook :... |
| 4997 | @mikeinspiresme so true #ketodiet #Health #tre... | so true ketodiet Health trend RecipeOfTheDay ... |
| 4998 | Keto Diet Cookbook For Beginners: 550 Craveabl... | Keto Diet Cookbook For Beginners: 550 Craveabl... |
| 4999 | @geoffreywoo @hvmn Do you follow a keto diet? ... | Do you follow a keto diet? If so , how do yo... |
| 5000 | Easy Chicken Fajita Foil Packets\n\nThis easy ... | Easy Chicken Fajita Foil Packets This easy tu... |
import nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()
[nltk_data] Downloading package vader_lexicon to [nltk_data] C:\Users\DELL\AppData\Roaming\nltk_data... [nltk_data] Package vader_lexicon is already up-to-date!
tweets_to_df['compound'] = [sid.polarity_scores(x)['compound'] for x in tweets_to_df['cleanedTweets']]
tweets_to_df['neg'] = [sid.polarity_scores(x)['neg'] for x in tweets_to_df['cleanedTweets']]
tweets_to_df['neu'] = [sid.polarity_scores(x)['neu'] for x in tweets_to_df['cleanedTweets']]
tweets_to_df['pos'] = [sid.polarity_scores(x)['pos'] for x in tweets_to_df['cleanedTweets']]
tweets_to_df
| Tweet | cleanedTweets | compound | neg | neu | pos | |
|---|---|---|---|---|---|---|
| 0 | Powerful New Formula Triggers Fat-Burning Keto... | Powerful New Formula Triggers Fat-Burning Keto... | 0.6996 | 0.000 | 0.746 | 0.254 |
| 1 | Keto Diet vs Mediterranean Diet – Which Is Bet... | Keto Diet vs Mediterranean Diet – Which Is Bet... | 0.1531 | 0.142 | 0.679 | 0.179 |
| 2 | @UltraHottie2000 I understand and look up the ... | I understand and look up the keto diet cuz th... | 0.3818 | 0.097 | 0.709 | 0.194 |
| 3 | Eating Keto is simple 🥑🥘 Fewer carbs, more hea... | Eating Keto is simple 🥑🥘 Fewer carbs, more hea... | 0.9680 | 0.062 | 0.536 | 0.402 |
| 4 | We're so grateful to all of our customers who ... | We're so grateful to all of our customers who ... | 0.9705 | 0.039 | 0.558 | 0.404 |
| ... | ... | ... | ... | ... | ... | ... |
| 4996 | The Healthy Mediterranean Keto Diet Cookbook :... | The Healthy Mediterranean Keto Diet Cookbook :... | 0.8834 | 0.000 | 0.553 | 0.447 |
| 4997 | @mikeinspiresme so true #ketodiet #Health #tre... | so true ketodiet Health trend RecipeOfTheDay ... | 0.4754 | 0.000 | 0.781 | 0.219 |
| 4998 | Keto Diet Cookbook For Beginners: 550 Craveabl... | Keto Diet Cookbook For Beginners: 550 Craveabl... | 0.0000 | 0.000 | 1.000 | 0.000 |
| 4999 | @geoffreywoo @hvmn Do you follow a keto diet? ... | Do you follow a keto diet? If so , how do yo... | 0.0000 | 0.000 | 1.000 | 0.000 |
| 5000 | Easy Chicken Fajita Foil Packets\n\nThis easy ... | Easy Chicken Fajita Foil Packets This easy tu... | 0.9025 | 0.063 | 0.713 | 0.224 |
5001 rows × 6 columns
i = 0
predicted_value = [ ] #empty series to hold our predicted values
while(i<len(tweets_to_df)):
if ((tweets_to_df.iloc[i]['compound'] >= 0.5)):
predicted_value.append('positive')
i = i+1
elif ((tweets_to_df.iloc[i]['compound'] > 0) & (tweets_to_df.iloc[i]['compound'] < 0.5)):
predicted_value.append('neutral')
i = i+1
elif ((tweets_to_df.iloc[i]['compound'] <= 0)):
predicted_value.append('negative')
i = i+1
tweets_to_df['predicted sentiment'] = predicted_value
tweets_to_df
| Tweet | cleanedTweets | compound | neg | neu | pos | predicted sentiment | |
|---|---|---|---|---|---|---|---|
| 0 | Powerful New Formula Triggers Fat-Burning Keto... | Powerful New Formula Triggers Fat-Burning Keto... | 0.6996 | 0.000 | 0.746 | 0.254 | positive |
| 1 | Keto Diet vs Mediterranean Diet – Which Is Bet... | Keto Diet vs Mediterranean Diet – Which Is Bet... | 0.1531 | 0.142 | 0.679 | 0.179 | neutral |
| 2 | @UltraHottie2000 I understand and look up the ... | I understand and look up the keto diet cuz th... | 0.3818 | 0.097 | 0.709 | 0.194 | neutral |
| 3 | Eating Keto is simple 🥑🥘 Fewer carbs, more hea... | Eating Keto is simple 🥑🥘 Fewer carbs, more hea... | 0.9680 | 0.062 | 0.536 | 0.402 | positive |
| 4 | We're so grateful to all of our customers who ... | We're so grateful to all of our customers who ... | 0.9705 | 0.039 | 0.558 | 0.404 | positive |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 4996 | The Healthy Mediterranean Keto Diet Cookbook :... | The Healthy Mediterranean Keto Diet Cookbook :... | 0.8834 | 0.000 | 0.553 | 0.447 | positive |
| 4997 | @mikeinspiresme so true #ketodiet #Health #tre... | so true ketodiet Health trend RecipeOfTheDay ... | 0.4754 | 0.000 | 0.781 | 0.219 | neutral |
| 4998 | Keto Diet Cookbook For Beginners: 550 Craveabl... | Keto Diet Cookbook For Beginners: 550 Craveabl... | 0.0000 | 0.000 | 1.000 | 0.000 | negative |
| 4999 | @geoffreywoo @hvmn Do you follow a keto diet? ... | Do you follow a keto diet? If so , how do yo... | 0.0000 | 0.000 | 1.000 | 0.000 | negative |
| 5000 | Easy Chicken Fajita Foil Packets\n\nThis easy ... | Easy Chicken Fajita Foil Packets This easy tu... | 0.9025 | 0.063 | 0.713 | 0.224 | positive |
5001 rows × 7 columns
tweets_to_df.to_excel('Clean.xlsx')
# count the occurrences of each unique value in the column
value_counts = tweets_to_df['predicted sentiment'].value_counts()
# plot the value counts as a bar chart
value_counts.plot(kind='bar')
<AxesSubplot:>
import seaborn as sns
sns.countplot(x='predicted sentiment', data=tweets_to_df, hue='predicted sentiment',palette=['red', 'lightgreen', 'blue'])
<AxesSubplot:xlabel='predicted sentiment', ylabel='count'>
tweets_to_df['predicted sentiment'].value_counts() #shows the counts of tweets' polarity
negative 2130 positive 1743 neutral 1128 Name: predicted sentiment, dtype: int64
! pip install NRCLex
Requirement already satisfied: NRCLex in c:\users\dell\anaconda3\python2.0\lib\site-packages (3.0.0) Requirement already satisfied: textblob in c:\users\dell\anaconda3\python2.0\lib\site-packages (from NRCLex) (0.17.1) Requirement already satisfied: nltk>=3.1 in c:\users\dell\anaconda3\python2.0\lib\site-packages (from textblob->NRCLex) (3.7) Requirement already satisfied: joblib in c:\users\dell\anaconda3\python2.0\lib\site-packages (from nltk>=3.1->textblob->NRCLex) (1.1.0) Requirement already satisfied: regex>=2021.8.3 in c:\users\dell\anaconda3\python2.0\lib\site-packages (from nltk>=3.1->textblob->NRCLex) (2022.3.15) Requirement already satisfied: click in c:\users\dell\anaconda3\python2.0\lib\site-packages (from nltk>=3.1->textblob->NRCLex) (8.0.4) Requirement already satisfied: tqdm in c:\users\dell\anaconda3\python2.0\lib\site-packages (from nltk>=3.1->textblob->NRCLex) (4.64.0) Requirement already satisfied: colorama in c:\users\dell\anaconda3\python2.0\lib\site-packages (from click->nltk>=3.1->textblob->NRCLex) (0.4.4)
import nltk
nltk.download('punkt')
[nltk_data] Downloading package punkt to [nltk_data] C:\Users\DELL\AppData\Roaming\nltk_data... [nltk_data] Package punkt is already up-to-date!
True
from nrclex import NRCLex
text_object = NRCLex(' '.join(tweets_to_df['cleanedTweets']))
text_object.affect_frequencies
{'fear': 0.0867430441898527,
'anger': 0.04923253859424072,
'anticip': 0.0,
'trust': 0.10642721281019153,
'surprise': 0.055734949351970625,
'positive': 0.1991418587163268,
'negative': 0.13239262175432387,
'sadness': 0.09479364798513734,
'disgust': 0.06705887556951387,
'joy': 0.09802273632060866,
'anticipation': 0.11045251470783386}
text_object.top_emotions
[('positive', 0.1991418587163268)]
sentiment_scores = pd.DataFrame(list(text_object.raw_emotion_scores.items()))
sentiment_scores = sentiment_scores.rename(columns={0: "Sentiment", 1: "Count"})
sentiment_scores
| Sentiment | Count | |
|---|---|---|
| 0 | positive | 4502 |
| 1 | anger | 1113 |
| 2 | negative | 2993 |
| 3 | sadness | 2143 |
| 4 | anticipation | 2497 |
| 5 | disgust | 1516 |
| 6 | fear | 1961 |
| 7 | joy | 2216 |
| 8 | surprise | 1260 |
| 9 | trust | 2406 |
import plotly.express as px
fig = px.pie(sentiment_scores, values='Count', names='Sentiment',
title='Sentiment Scores',
hover_data=['Sentiment'])
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()